'''


exec(''.join(open("/home/dalton_m/ppp/pyfiles/CRppp_cntynaicsv1.py", encoding="utf8").readlines()[:]))
nohup python3 /home/dalton_m/ppp/pyfiles/CRppp_cntynaicsv1.py  | tee &
'''


import os

import pandas as pd

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
import sys

sys.path.append('/home/dalton_m/payload')
from basicfunctions import *

filename = 'CRppp_cntynaicsv1_2019'

resultsloc1 = "/home/dalton_m/ppp/"

from datetime import date
datestr = date.today().strftime(format="%Y%m%d")
resultsloc = resultsloc1 + datestr + '/'
if not os.path.exists(resultsloc):
    os.makedirs(resultsloc)


logging.basicConfig(filename=resultsloc + filename + '.txt', level=logging.ERROR,
                    format='%(asctime)s - %(levelname)s - %(message)s')
logging.debug('This message should go to the log file')
logging.info('So should this')
logging.warning('And this, too')
logging.exception('And this, too')
logging.captureWarnings(True)

sys.path.append(resultsloc1 + 'pyfiles/')

#the quarter data is available
q=8
moncount = q*3 + 1

'''
************************************************************************************************
************************************************************************************************
************************************************************************************************
- employment
1) get post-2019 LDBs from employment file
2) get 2019 qcew data and keep ldbs from 1)
3) merge 2019 zip code
4) get monthly employment by zip code and naics 4 digit
5) load 1) again, load 2020, then 2021 zip code data and merge
6) post-2019 employment by zip- by month
- wages
-- same thing

************************************************************************************************
************************************************************************************************
************************************************************************************************
'''
'''
************************************************************************************************
************************************************************************************************
************************************************************************************************
- employment
1) get post-2019 LDBs from employment file
************************************************************************************************
************************************************************************************************
************************************************************************************************
'''
df1 = cudf.read_csv(dataloc + 'pppfiles/' + 'ANppp_ldbv1' + '_emp.csv')
ldblist = df1['ldb_num'].to_pandas().unique().tolist()
df1 = None
'''
************************************************************************************************
************************************************************************************************
************************************************************************************************
- employment
2) get 2019 qcew data and keep ldbs from 1)
3) merge 2019 zip code
4) get monthly employment by zip code and naics 4 digit

************************************************************************************************
************************************************************************************************
************************************************************************************************
'''
######get prev year seasonal emp data
yrm1 = 2017
fips = ''
qtr = ''
own = 5
# 'ui_acct', 'rpt_unit',
cols = ['aaemp', 'meei', 'naics_code']
dfldb = qcew(yrm1, qtr, fips, own, cols).rename(columns={'ui_acct': 'ui_acct',
                                                         'rpt_unit': 'run_num',
                                                         'naics_code': 'naics_ldb'})
dfldb = dfldb[(dfldb['meei'].isin([1, 3, 4, 5, 6])) & (dfldb['ldb_num'].isin(ldblist))].drop(columns='meei')
##geography
dfg = cudf.read_csv(dataloc + 'ldb/address/' + 'ldbaddressesv1_2017.psv', sep='|')[['ldb_num','cnty', 'fipsstate']].dropna().drop_duplicates(subset='ldb_num', keep = 'first')
dfg['fipscnty'] = dfg['fipsstate']*1000 + dfg['cnty']
dfg = cudf.concat([dfg[['ldb_num', 'fipscnty']], cudf.read_csv(dataloc + 'ldb/newgeog' + '20' + '.psv', sep='|')[['ldb_num', 'cnty']].dropna().rename(columns = {'cnty' : 'fipscnty'})]).drop_duplicates(subset='ldb_num', keep = 'first')

#merge
kcols = ['ldb_num', 'emp_m1', 'emp_m2', 'emp_m3','emp_m4', 'emp_m5', 'emp_m6',
 'emp_m7', 'emp_m8', 'emp_m9', 'emp_m10', 'emp_m11', 'emp_m12',
       'naics_ldb', ]
dfldb = dfldb[kcols].merge(dfg, on = ['ldb_num'], how = 'inner')

##naics4
dfldb['naics4'] = dfldb['naics_ldb'].to_pandas().astype('str').str[:4]
dfzip19 = dfldb.groupby(['fipscnty', 'naics4'])[['emp_m'+str(i) for i in range(1,13)]].sum().reset_index()
dfldb = None

'''
************************************************************************************************
************************************************************************************************
************************************************************************************************
- employment
5) load 1) again, load 2020, then 2021 zip code data and merge

************************************************************************************************
************************************************************************************************
************************************************************************************************
'''
########################
####### 2020
########################
##geography for 2020



##2020 data
yrm1 = 2018
fips = ''
qtr = ''
own = 5
# 'ui_acct', 'rpt_unit',
cols = [ 'meei', 'naics_code', 'aaemp']
dfldb = qcew(yrm1, qtr, fips, own, cols).rename(columns={'ui_acct': 'ui_acct',
                                                         'rpt_unit': 'run_num',
                                                         'naics_code': 'naics_ldb'})
dfldb = dfldb[(dfldb['meei'].isin([1, 3, 4, 5, 6])) & (dfldb['ldb_num'].isin(ldblist))].drop(columns='meei')
empcols = ['emp_m'+str(m) for m in range(1,13)]
dfldb['naics4'] = dfldb['naics_ldb'].to_pandas().astype('str').str[:4]
dfldb = dfldb[['ldb_num', 'naics4', ]+empcols].merge(dfg, on = ['ldb_num'], how = 'inner')
dfldb['fipscnty'] = pd.to_numeric(dfldb['fipscnty'].to_pandas(), errors='coerce')
dfldb = dfldb.dropna()

kdict = {}
tcols = ['ldb_num', 'fipscnty', 'naics4', 'emp_m']
for i in range(1,13):
    rdict = {
        'emp_m'+str(i) : 'emp_m'
    }
    kdict[i] = dfldb.rename(columns=rdict)[tcols].dropna().groupby(['fipscnty', 'naics4'])['emp_m'].sum().reset_index()
dfldb = None

#################################
#################################
####2019
#################################
#################################

##2020 data
yrm1 = 2019
fips = ''
qtr = ''
own = 5
# 'ui_acct', 'rpt_unit',
cols = [ 'meei', 'naics_code', 'aaemp']
dfldb = qcew(yrm1, qtr, fips, own, cols).rename(columns={'ui_acct': 'ui_acct',
                                                         'rpt_unit': 'run_num',
                                                         'naics_code': 'naics_ldb'})
dfldb = dfldb[(dfldb['meei'].isin([1, 3, 4, 5, 6])) & (dfldb['ldb_num'].isin(ldblist))].drop(columns='meei')
empcols = ['emp_m'+str(m) for m in range(1,13)]
dfldb['naics4'] = dfldb['naics_ldb'].to_pandas().astype('str').str[:4]
dfldb = dfldb[['ldb_num', 'naics4', ]+empcols].merge(dfg, on = ['ldb_num'], how = 'inner')
dfldb['fipscnty'] = pd.to_numeric(dfldb['fipscnty'].to_pandas(), errors='coerce')
dfldb = dfldb.dropna()
dfg = None

tcols = ['ldb_num', 'fipscnty', 'naics4', 'emp_m']
for i in range(1,13):
    j = i + 12
    rdict = {
        'emp_m'+str(i) : 'emp_m'
    }
    kdict[j] = dfldb.rename(columns=rdict)[tcols].dropna().groupby(['fipscnty', 'naics4'])['emp_m'].sum().reset_index()
dfldb = None
'''
************************************************************************************************
************************************************************************************************
************************************************************************************************
6) post-2019 employment by zip-naics4 by month

************************************************************************************************
************************************************************************************************
************************************************************************************************
'''
dfzip19['fipscnty'] = pd.to_numeric(dfzip19['fipscnty'].to_pandas(), errors='coerce')
mc = ['fipscnty', 'naics4']
for i in range(1,25):
    j = i % 12
    if j==0:
        j = 12
    kcols = mc + ['emp_m'+ str(j)]
    renamedict = {'emp_m'+ str(j) : 'emp_19'}
    kdict[i].merge(dfzip19[kcols].rename(columns = renamedict), on = mc, how = 'outer').to_csv(dataloc + 'pppfiles/' + filename + '_' + str(i) + '.csv')



'''
************************************************************************************************
************************************************************************************************
************************************************************************************************
- wages
1) get post-2019 LDBs from employment file
************************************************************************************************
************************************************************************************************
************************************************************************************************
'''
# ###use same ldblist as above
# df1 = cudf.read_csv(dataloc + 'pppfiles/' + 'ANppp_ldbv1' + '_wage.csv')
# ldblist = df1['ldb_num'].to_pandas().unique().tolist()
# '''
# ************************************************************************************************
# ************************************************************************************************
# ************************************************************************************************
# - employment
# 2) get 2019 qcew data and keep ldbs from 1)
# 3) merge 2019 zip code
# 4) get monthly employment by zip code and naics 4 digit
#
# ************************************************************************************************
# ************************************************************************************************
# ************************************************************************************************
# '''
# ######get prev year seasonal emp data
# yrm1 = 2018
# fips = ''
# qtr = ''
# own = 5
# # 'ui_acct', 'rpt_unit',
# cols = ['wages', 'meei', 'naics_code']
# dfldb = qcew(yrm1, qtr, fips, own, cols).rename(columns={'ui_acct': 'ui_acct',
#                                                          'rpt_unit': 'run_num',
#                                                          'naics_code': 'naics_ldb'})
# dfldb = dfldb[(dfldb['meei'].isin([1, 3, 4, 5, 6])) & (dfldb['ldb_num'].isin(ldblist))].drop(columns='meei')
# ##geography
# dfg = cudf.read_csv(dataloc + 'ldb/address/' + 'ldbaddressesv1_2019.psv', sep='|')[['ldb_num','cnty', 'fipsstate']].dropna().drop_duplicates(subset='ldb_num', keep = 'first')
# dfg['fipscnty'] = dfg['fipsstate']*1000 + dfg['cnty']
# dfg = cudf.concat([dfg[['ldb_num', 'fipscnty']], cudf.read_csv(dataloc + 'ldb/newgeog' + '20' + '.psv', sep='|')[['ldb_num', 'cnty']].dropna().rename(columns = {'cnty' : 'fipscnty'})]).drop_duplicates(subset='ldb_num', keep = 'first')
#
# #merge
# kcols = ['ldb_num', 'wages_1','wages_2','wages_3','wages_4',
#        'naics_ldb', ]
# dfldb = dfldb[kcols].merge(dfg, on = ['ldb_num'], how = 'inner')
#
# ##naics4
# dfldb['naics4'] = dfldb['naics_ldb'].to_pandas().astype('str').str[:4]
# dfzip19 = dfldb.groupby(['fipscnty', 'naics4'])[['wages_'+str(i) for i in range(1,5)]].sum().reset_index()
# dfldb = None
#
# '''
# ************************************************************************************************
# ************************************************************************************************
# ************************************************************************************************
# - wages
# 5) load 1) again, load 2020, then 2021 zip code data and merge
#
# ************************************************************************************************
# ************************************************************************************************
# ************************************************************************************************
# '''
# ########################
# ####### 2020
# ########################
# ##geography for 2020
#
# ##2020 data
# yrm1 = 2019
# fips = ''
# qtr = ''
# own = 5
# # 'ui_acct', 'rpt_unit',
# cols = [ 'meei', 'naics_code']
# dfldb = qcew(yrm1, qtr, fips, own, cols).rename(columns={'ui_acct': 'ui_acct',
#                                                          'rpt_unit': 'run_num',
#                                                          'naics_code': 'naics_ldb'})
# dfldb = dfldb[(dfldb['meei'].isin([1, 3, 4, 5, 6])) & (dfldb['ldb_num'].isin(ldblist))].drop(columns='meei')
# dfldb['naics4'] = dfldb['naics_ldb'].to_pandas().astype('str').str[:4]
# dfldb = dfldb[['ldb_num', 'naics4', ]].merge(dfg, on = ['ldb_num'], how = 'inner')
# dfldb['fipscnty'] = pd.to_numeric(dfldb['fipscnty'].to_pandas(), errors='coerce')
# dfldb = dfldb.dropna()
# dfg = None
# kdict = {}
# for i in range(1,13):
#     cond = (df1['num_month'] == i)
#     kdict[i] = df1[cond].to_pandas().merge(dfldb.to_pandas(), on = ['ldb_num'], how = 'inner').dropna().groupby(['fipscnty', 'naics4'])['wage_m'].sum().reset_index()
# dfldb = None
# '''
# ************************************************************************************************
# ************************************************************************************************
# ************************************************************************************************
# 6) post-2019 employment by zip-naics4 by month
#
# ************************************************************************************************
# ************************************************************************************************
# ************************************************************************************************
# '''
# dfzip19['fipscnty'] = pd.to_numeric(dfzip19['fipscnty'].to_pandas(), errors='coerce')
# mc = ['fipscnty', 'naics4']
# for i in range(1,13):
#     j = ((i-1) // 3) + 1
#     if j > 4:
#         j = j - 4
#     kcols = mc + ['wages_'+ str(j)]
#     renamedict = {'wages_'+ str(j) : 'wages_19'}
#     kdict[i].merge(dfzip19[kcols].to_pandas().rename(columns = renamedict), on = mc, how = 'outer').to_csv(dataloc + 'pppfiles/' + filename + '_wage' + str(i) + '.csv')
#
